package cn.tyoui.proxy;

import cn.tyoui.pojo.ProxyIP;
import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

/**
 * 免费代理ip类
 *
 * @author Tyoui
 * @version 1.8.1
 */
public class CrawlerProxyIP {
    private CloseableHttpClient httpClient = HttpClients.createDefault();
    private List<ProxyIP> list = new ArrayList<>();

    /**
     * 爬免费的代理ip网址
     *
     * @param url 代理ip网址
     * @return 代理ip数据
     * @throws Exception 代理ip网址错误
     */
    public String crawler(String url) throws Exception {
        CloseableHttpResponse response = null;
        HttpGet httpGet = null;
        String text = null;
        try {
            httpGet = new HttpGet(url);
            httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
            httpGet.setHeader("Accept-Encoding", "gzip,deflate,sdch");
            httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
            httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
            RequestConfig requestConfig;
            requestConfig = RequestConfig.custom().build();
            httpGet.setConfig(requestConfig);
            response = httpClient.execute(httpGet);
            HttpEntity entity = response.getEntity();
            text = EntityUtils.toString(entity);
        } catch (Exception e) {
        } finally {
            if (httpGet != null)
                httpGet.clone();
            if (response != null)
                response.close();
        }
        return text;
    }

    /**
     * 分析代理ip网址数据
     *
     * @param html 爬取的网址
     */
    public void proxyRead(String html) {
        try {
            Document document = Jsoup.parse(html);
            Elements elements = document.select("tr");
            for (int i = 1; i < elements.size(); i++) {
                String ip[] = elements.get(i).text().split(" ");
                ProxyIP proxyIP = new ProxyIP(ip[0], Integer.parseInt(ip[1]));
                String speed = elements.get(i).select(".bar_inner").attr("style");
                speed = speed.substring(6, speed.length() - 1);
                proxyIP.setIPSpeed(speed);
                list.add(proxyIP);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 保存免费代理ip到文本中
     *
     * @param maxIp 提取多少个IP
     */
    public void writeIP(int maxIp) {
        try {
            String path = new File("").getCanonicalPath() + File.separator + "text";
            File file = new File(path);
            if (!file.exists())
                file.mkdirs();
            path = path + File.separator + "ip.txt";
            Collections.sort(list, Comparator.comparing(ProxyIP::getIPSpeed, Comparator.reverseOrder()));
            FileUtils.writeLines(new File(path), list.subList(0, maxIp));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


    /**
     * 根据代理ip接口来分离ip
     *
     * @param text  代理ip数据
     * @param split ip自己的分割符，默认是换行
     * @return 返回代理ip链表
     */
    public List<ProxyIP> htmlListIP(String text, String split) {
        if (text != null) {
            String html[] = text.split(split);
            for (String ip : html) {
                String ipPort[] = ip.split(":");
                if (ipPort.length == 2)
                    list.add(new ProxyIP(ipPort[0], Integer.parseInt(ipPort[1])));
            }
        }
        return list;
    }
}
